library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(usmap)
library(maps)
library(ggplot2)
library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.2.1 --
## v tibble 2.1.3 v purrr 0.3.3
## v tidyr 1.0.0 v stringr 1.4.0
## v readr 1.3.1 v forcats 0.4.0
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
## x purrr::map() masks maps::map()
library(ggmap)
## Google's Terms of Service: https://cloud.google.com/maps-platform/terms/.
## Please cite ggmap if you use it! See citation("ggmap") for details.
library(viridis)
## Loading required package: viridisLite
library(rgdal)
## Loading required package: sp
## rgdal: version: 1.4-8, (SVN revision 845)
## Geospatial Data Abstraction Library extensions to R successfully loaded
## Loaded GDAL runtime: GDAL 2.2.3, released 2017/11/20
## Path to GDAL shared files: C:/Users/16196/OneDrive/Documents/R/win-library/3.6/rgdal/gdal
## GDAL binary built with GEOS: TRUE
## Loaded PROJ.4 runtime: Rel. 4.9.3, 15 August 2016, [PJ_VERSION: 493]
## Path to PROJ.4 shared files: C:/Users/16196/OneDrive/Documents/R/win-library/3.6/rgdal/proj
## Linking to sp version: 1.4-1
library(gridExtra)
##
## Attaching package: 'gridExtra'
## The following object is masked from 'package:dplyr':
##
## combine
require(janitor)
## Loading required package: janitor
##
## Attaching package: 'janitor'
## The following objects are masked from 'package:stats':
##
## chisq.test, fisher.test
require(reshape2)
## Loading required package: reshape2
##
## Attaching package: 'reshape2'
## The following object is masked from 'package:tidyr':
##
## smiths
require(tidyr)
require(rstan)
## Loading required package: rstan
## Loading required package: StanHeaders
## rstan (Version 2.19.2, GitRev: 2e1f913d3ca3)
## For execution on a local, multicore CPU with excess RAM we recommend calling
## options(mc.cores = parallel::detectCores()).
## To avoid recompilation of unchanged Stan programs, we recommend calling
## rstan_options(auto_write = TRUE)
## For improved execution time, we recommend calling
## Sys.setenv(LOCAL_CPPFLAGS = '-march=native')
## although this causes Stan to throw an error on a few processors.
##
## Attaching package: 'rstan'
## The following object is masked from 'package:tidyr':
##
## extract
require(rstanarm)
## Loading required package: rstanarm
## Loading required package: Rcpp
## Registered S3 method overwritten by 'xts':
## method from
## as.zoo.xts zoo
## rstanarm (Version 2.19.2, packaged: 2019-10-01 20:20:33 UTC)
## - Do not expect the default priors to remain the same in future rstanarm versions.
## Thus, R scripts should specify priors explicitly, even if they are just the defaults.
## - For execution on a local, multicore CPU with excess RAM we recommend calling
## options(mc.cores = parallel::detectCores())
## - bayesplot theme set to bayesplot::theme_default()
## * Does _not_ affect other ggplot2 plots
## * See ?bayesplot_theme_set for details on theme setting
##
## Attaching package: 'rstanarm'
## The following object is masked from 'package:rstan':
##
## loo
require(bayesplot)
## Loading required package: bayesplot
## This is bayesplot version 1.7.1
## - Online documentation and vignettes at mc-stan.org/bayesplot
## - bayesplot theme set to bayesplot::theme_default()
## * Does _not_ affect other ggplot2 plots
## * See ?bayesplot_theme_set for details on theme setting
Finaldata<- read.csv("time_series_final_Data.csv")
google<-read.csv("Google/googlefinal.csv")
names(google)
## [1] "X.1" "X" "Day"
## [4] "State" "ChinaVirusInterest" "KungFluInterest"
## [7] "Region"
Re-Cleaned
google <- google %>% select(-c(X.1))
Finaldata <- Finaldata %>% select(-c(X))
Finaldata <- Finaldata %>% mutate(Day= as.Date(Day))
Demographic: Our dataset, Demographic, was created by merging three other datasets which all contained different demographic and election information. We obtained the main portion of our demographic data from the US Census Bureau’s American Community Survey (ACS) which is an ongoing survey administered by the U.S. Census Bureau. It gathers information on income, employment, housing characteristics, etc, annually for all the 50 U.S. States on the county and state level. To access the county-level dataset we used the R package called Choroplethr which provides API connections to data sources like the ACS. The ACS County-Level dataset was then merged with a county-level election outcome dataset that was created by Tony McGoven. Tony’s dataset contained presidential election results for 2008,2012, and 2016 but we chose to focus solely on the most recent election,2016. That said, the 2016 election results at the county-level were scraped from results published by Townhall.com. However, the State of Alaska reports results at the precinct or state level so there was no county-level data available. Therefore, another dataset had to be created that contained the election results for Alaska and this was done using the official election results provided by the Alaska Division of Elections and was later merged in. The final dataset that was used came from Alicia Johnson and it contained information on a state’s political leaning. Meaning it categorizes each county as belonging to a blue/red/purple state based on the state categorizations at 279towin.
COVID-19 Cases The COVID-19 data is provided by The COVID Tracking Project(CTP). All of the data points come from state/district/territory public health authorities—or, occasionally, from trusted news reporting, official press conferences, or (very occasionally) tweets or Facebook updates from state public health authorities or governors. These numbers are updated daily at 4PM EST. The biggest weakness of this dataset is that there is no standardized methods for states to follow for data collection/report. For example, some states, like Oregon, provide the full set of numbers but others provide some or none of these numbers on an ongoing basis. Some crucial states in this outbreak, notably California, Washington, and New York, have not been regularly reporting their total number of people tested. The CTP aims to remedy this uncertainty in states by utilizing other reporting/measuring tools such as: “Directly asking state officials, watching news conferences, gleaning information from trusted news sources, and whatever else it takes to present reliable numbers.”
Google Search Interest This data set includes two search interest indexes over time, measuring how people in each of the state’s interest in searching either “Kung Flu” or “China Virus” based on the time frame selected in the search. This data is downloaded directly from Google Trends which uses the same technique to track the interest of all searches on the platform. The main downside to this data set is the method of the indexing which makes the comparison from state to state less meaningful since each state is guaranteed to have a 100-level interest on their peak day, and the actual unknown search values can vary greatly across different states.
dim(Finaldata)
## [1] 765 31
names(Finaldata)
## [1] "Day" "State"
## [3] "ChinaVirusInterest" "KungFluInterest"
## [5] "positive" "negative"
## [7] "death" "hospitalized"
## [9] "totalTestResults" "FIPS"
## [11] "StayAtHome_date" "Quarantine_Yes"
## [13] "polyname" "StateColor"
## [15] "total_2016" "dem_2016"
## [17] "gop_2016" "oth_2016"
## [19] "total_population" "percent_white"
## [21] "percent_black" "percent_asian"
## [23] "percent_hispanic" "per_capita_income"
## [25] "median_rent" "median_age"
## [27] "percent_democrat2016" "percent_republican2016"
## [29] "percent_other2016" "Winner"
## [31] "Region"
head(Finaldata)
## Day State ChinaVirusInterest KungFluInterest positive negative
## 1 2020-03-10 AK 27 0 0 23
## 2 2020-03-11 AK 0 0 0 46
## 3 2020-03-12 AK 26 0 0 46
## 4 2020-03-13 AK 28 0 1 59
## 5 2020-03-14 AK 30 0 1 143
## 6 2020-03-15 AK 61 0 1 143
## death hospitalized totalTestResults FIPS StayAtHome_date Quarantine_Yes
## 1 NA NA 23 2 03/28/2020 1
## 2 NA NA 46 2 03/28/2020 1
## 3 NA NA 46 2 03/28/2020 1
## 4 NA NA 60 2 03/28/2020 1
## 5 NA NA 144 2 03/28/2020 1
## 6 NA NA 144 2 03/28/2020 1
## polyname StateColor total_2016 dem_2016 gop_2016 oth_2016
## 1 alaska red 318608 116454 163387 38767
## 2 alaska red 318608 116454 163387 38767
## 3 alaska red 318608 116454 163387 38767
## 4 alaska red 318608 116454 163387 38767
## 5 alaska red 318608 116454 163387 38767
## 6 alaska red 318608 116454 163387 38767
## total_population percent_white percent_black percent_asian
## 1 741456 0.65 0.04 0.07
## 2 741456 0.65 0.04 0.07
## 3 741456 0.65 0.04 0.07
## 4 741456 0.65 0.04 0.07
## 5 741456 0.65 0.04 0.07
## 6 741456 0.65 0.04 0.07
## percent_hispanic per_capita_income median_rent median_age
## 1 0.07 34922 NA 34.7
## 2 0.07 34922 NA 34.7
## 3 0.07 34922 NA 34.7
## 4 0.07 34922 NA 34.7
## 5 0.07 34922 NA 34.7
## 6 0.07 34922 NA 34.7
## percent_democrat2016 percent_republican2016 percent_other2016 Winner
## 1 0.3655087 0.5128151 0.1216762 Republican
## 2 0.3655087 0.5128151 0.1216762 Republican
## 3 0.3655087 0.5128151 0.1216762 Republican
## 4 0.3655087 0.5128151 0.1216762 Republican
## 5 0.3655087 0.5128151 0.1216762 Republican
## 6 0.3655087 0.5128151 0.1216762 Republican
## Region
## 1 West
## 2 West
## 3 West
## 4 West
## 5 West
## 6 West
summary(Finaldata)
## Day State ChinaVirusInterest KungFluInterest
## Min. :2020-03-10 AK : 15 Min. : 0.00 Min. : 0.000
## 1st Qu.:2020-03-13 AL : 15 1st Qu.: 26.00 1st Qu.: 0.000
## Median :2020-03-17 AR : 15 Median : 37.00 Median : 0.000
## Mean :2020-03-17 AZ : 15 Mean : 37.24 Mean : 3.903
## 3rd Qu.:2020-03-21 CA : 15 3rd Qu.: 49.00 3rd Qu.: 5.000
## Max. :2020-03-24 CO : 15 Max. :100.00 Max. :58.000
## (Other):675
## positive negative death hospitalized
## Min. : 0.0 Min. : 0 Min. : 0.000 Min. : 0.00
## 1st Qu.: 10.0 1st Qu.: 94 1st Qu.: 1.000 1st Qu.: 3.75
## Median : 38.0 Median : 337 Median : 2.000 Median : 32.50
## Mean : 322.2 Mean : 1820 Mean : 8.728 Mean : 210.91
## 3rd Qu.: 152.0 3rd Qu.: 1447 3rd Qu.: 5.000 3rd Qu.: 73.75
## Max. :25665.0 Max. :65605 Max. :210.000 Max. :3234.00
## NA's :41 NA's :416 NA's :709
## totalTestResults FIPS StayAtHome_date Quarantine_Yes
## Min. : 0 Min. : 1.00 03/24/2020: 90 Min. :0.00
## 1st Qu.: 103 1st Qu.:16.00 03/28/2020: 75 1st Qu.:1.00
## Median : 380 Median :29.00 03/30/2020: 75 Median :1.00
## Mean : 2045 Mean :28.96 03/23/2020: 60 Mean :0.88
## 3rd Qu.: 1500 3rd Qu.:42.00 03/25/2020: 60 3rd Qu.:1.00
## Max. :91270 Max. :56.00 (Other) :300 Max. :1.00
## NA's :105 NA's :15
## polyname StateColor total_2016 dem_2016
## alabama : 15 blue :285 Min. : 248742 Min. : 55949
## alaska : 15 purple:165 1st Qu.: 730628 1st Qu.: 266827
## arizona : 15 red :315 Median :1922218 Median : 779535
## arkansas : 15 Mean :2489256 Mean :1188393
## california: 15 3rd Qu.:3208899 3rd Qu.:1534487
## colorado : 15 Max. :9631972 Max. :5931283
## (Other) :675
## gop_2016 oth_2016 total_population percent_white
## Min. : 11553 Min. : 8496 Min. : 570134 Min. :0.2300
## 1st Qu.: 345598 1st Qu.: 38767 1st Qu.: 1583364 1st Qu.:0.5900
## Median : 947934 Median : 91364 Median : 4361333 Median :0.7400
## Mean :1179254 Mean :121608 Mean : 6108975 Mean :0.7029
## 3rd Qu.:1535513 3rd Qu.:183694 3rd Qu.: 6819579 3rd Qu.:0.8300
## Max. :4681590 Max. :515968 Max. :37659181 Max. :0.9400
##
## percent_black percent_asian percent_hispanic per_capita_income
## Min. :0.0000 Min. :0.01000 Min. :0.0100 Min. :20618
## 1st Qu.:0.0300 1st Qu.:0.01000 1st Qu.:0.0400 1st Qu.:24635
## Median :0.0700 Median :0.02000 Median :0.0800 Median :26824
## Mean :0.1084 Mean :0.03765 Mean :0.1082 Mean :28098
## 3rd Qu.:0.1500 3rd Qu.:0.04000 3rd Qu.:0.1300 3rd Qu.:30469
## Max. :0.4900 Max. :0.37000 Max. :0.4700 Max. :45290
##
## median_rent median_age percent_democrat2016
## Min. : 448.0 Min. :29.60 Min. :0.2249
## 1st Qu.: 564.0 1st Qu.:36.30 1st Qu.:0.3609
## Median : 658.0 Median :37.60 Median :0.4670
## Mean : 714.3 Mean :37.66 Mean :0.4501
## 3rd Qu.: 838.0 3rd Qu.:39.00 3rd Qu.:0.5335
## Max. :1220.0 Max. :43.20 Max. :0.9285
## NA's :15
## percent_republican2016 percent_other2016 Winner Region
## Min. :0.04122 Min. :0.01937 Democrat :315 Midwest :195
## 1st Qu.:0.41161 1st Qu.:0.04160 Republican:450 Mountain :120
## Median :0.49064 Median :0.05071 Northeast:180
## Mean :0.49010 Mean :0.05976 South :195
## 3rd Qu.:0.58095 3rd Qu.:0.06991 West : 75
## Max. :0.70052 Max. :0.25598
##
colnames(Finaldata)
## [1] "Day" "State"
## [3] "ChinaVirusInterest" "KungFluInterest"
## [5] "positive" "negative"
## [7] "death" "hospitalized"
## [9] "totalTestResults" "FIPS"
## [11] "StayAtHome_date" "Quarantine_Yes"
## [13] "polyname" "StateColor"
## [15] "total_2016" "dem_2016"
## [17] "gop_2016" "oth_2016"
## [19] "total_population" "percent_white"
## [21] "percent_black" "percent_asian"
## [23] "percent_hispanic" "per_capita_income"
## [25] "median_rent" "median_age"
## [27] "percent_democrat2016" "percent_republican2016"
## [29] "percent_other2016" "Winner"
## [31] "Region"
| Variables: | Description: |
|---|---|
|
State Name |
|
Political Leaning |
|
Percent of the Population that is Hispanic |
|
Percent of the Population that is White |
|
Percent of the Population that is Asian |
|
Percent of Population that is Black |
|
Total State Population |
|
Income per Capita |
|
Percent of votes won by Democrat (Clinton) |
|
Percent of votes won by Republican (Trump) |
|
Indicator for whether a Republican or Democrat Won |
|
Total Number of Votes |
|
Number of reported positive COVID-19 cases |
|
Number of reported negative COVID-19 cases |
|
Date of report |
|
Total Number of reported deaths due to COVID-19 |
|
Total Number of indivudals hopitalized due to COVID-19 |
|
Total Number test results (Positive +Negative) |
|
A five-digit Federal Information Processing Standards code which uniquely identified counties and county |
|
Interest index from google searches by state. Peak search day=100, all other days in set are based searches on relative to this peak day. |
|
Interest index from google searches by state. Peak search day=100, all other days in set are based searches on relative to this peak day. |
library(plotly)
## Warning: package 'plotly' was built under R version 3.6.3
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggmap':
##
## wind
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
#google_ts<-read.csv("Time_series_Google.csv")
day1<-as.Date("03/10/2020",format = "%m/%d/%Y")
day2<-as.Date("03/24/2020",format = "%m/%d/%Y")
google_ts<-google%>%
mutate(Day = as.Date(as.character(Day)))
a<-google_ts %>%
#filter(Day<=day2)%>%
#filter(Day>=day1)%>%
group_by(Region, Day) %>%
summarize(ChinaVirusSearch = median(ChinaVirusInterest)) %>%
ggplot(aes(x=Day, y=ChinaVirusSearch, color=Region))+
geom_point()
ggplotly(a)
#Demographic: Identification
Finaldata <- data.frame(Finaldata) %>% mutate(state = State)
plot_usmap(data = Finaldata, values = "percent_white", color = "white") +
scale_fill_continuous(name = "Percent White", label = scales::comma) +
theme(legend.position = "right")+ ggtitle("Percent of Residents that Identify as White") +
theme(
plot.title = element_text(color="Black", size=14, face="bold")
)
plot_usmap(data = Finaldata, values = "percent_asian", color = "white") +
scale_fill_continuous(low = "sky blue", high = "black", name = "Percent Asian", label = scales::comma) +
theme(legend.position = "right") + ggtitle("Percent of Residents that Identify as Asian") +
theme(
plot.title = element_text(color="Black", size=14, face="bold")
)
#By Results
plot_usmap(data = Finaldata, values = "percent_asian", color = "white") +
scale_fill_continuous(low = "sky blue", high = "black", name = "Percent Asian", label = scales::comma) +
theme(legend.position = "right") + ggtitle("Percent Asian by General Election Results")+
theme(
plot.title = element_text(color="Black", size=14, face="bold")
) + facet_wrap(~Winner)
#By income
plot_usmap(data = Finaldata, values = "per_capita_income", color = "white") +
scale_fill_continuous(low = "sky blue", high = "black", name = "$ Per Capita", label = scales::comma) +
theme(legend.position = "right") + ggtitle("Percent Capita Income")+
theme(
plot.title = element_text(color="Black", size=14, face="bold")
)
#Income by state affiliation
ggplot(Finaldata, aes(x = per_capita_income, fill = StateColor)) +
geom_density(alpha = .8)+ ggtitle("Income per Capita") + xlab("$") + ylab("Density")
#Trump Support
ggplot(Finaldata, aes(x = percent_republican2016)) +
geom_density()+ ggtitle("Trump Support During the 2016 Elections") + xlab("Percent of Trump Support") + ylab("Density")
ggplot(Finaldata, aes(x = percent_asian, y = percent_republican2016, color = Winner)) +
scale_color_manual(values = c("blue","purple","red")) +
geom_point(alpha = 0.8) + geom_text(aes(label=ifelse(percent_asian>.2,as.character(state),"")),hjust=1.2,vjust=0)+ geom_text(aes(label=ifelse(percent_republican2016<.2,as.character(state),"")),hjust=-.1,vjust=0) + ggtitle("Percent of Asians and Trump Support by Election Outcome") + xlab("Percent Asian") + ylab("Trump Support")
ggplot(Finaldata, aes(x = per_capita_income)) +
geom_density()+ ggtitle("Per Capita Income") + xlab("Per Capita Income") + ylab("Density") + facet_wrap(~Winner)
## Creating a df of just the cases the week before 3/17
weekbefore <- data.frame(Finaldata) %>% filter(Day <= as.Date("2020-03-17"))
plot1<- plot_usmap(data = weekbefore, values = "positive", color = "white") +
scale_fill_continuous(name = "Cases", label = scales::comma) +
theme(legend.position = "right")+ ggtitle("# of COVID Cases Before 3/17") +
theme(
plot.title = element_text(color="Black", size=8)
)
## Creating a df of just the cases the week after 3/17
weekafter <- data.frame(Finaldata) %>% filter(Day >= as.Date("2020-03-17"))
plot2<-plot_usmap(data = weekafter, values = "positive", color = "white") +
scale_fill_continuous(name = "Cases", label = scales::comma) +
theme(legend.position = "right")+ ggtitle("# of COVID Cases After 3/17") +
theme(
plot.title = element_text(color="Black", size=5)
)
# The number of Positive Covid Cases, comparison before and after the China Virus announcement
grid.arrange(plot1, plot2, ncol=2)
# Positive COVID cases aggregated by party
ggplot(Finaldata, aes(x = positive, fill = StateColor)) +
geom_density(alpha = .8)+ ggtitle("Positive COVID-19 Cases by State Party") + xlab("Cases") + ylab("Density")+xlim(0,500)
## Warning: Removed 81 rows containing non-finite values (stat_density).
# ## Positive COVID cases map before and after week
# plot3<-statebins(state_data = weekbefore, state_col = "State",
# text_color = "white", value_col = "positive",
# brewer_pal="Spectral", font_size = 3,breaks =7,
# legend_title="Week Before",
# labels = c("0","1-5","5-10","10-20","20-30","30-40","50-100"))
#
# ### Drastic scale change, needs work for comparison sake
# plot4<-statebins(state_data = weekafter, state_col = "state",
# text_color = "white", value_col = "positive",
# brewer_pal="Spectral", font_size = 3,breaks =6,
# legend_title="Week After",labels = c("1-100", "100-500", "500-5000", "5000-15000", "15000-25000", "25000-35000"))
#
# plot3
# plot4
ggplot(Finaldata, aes(x = ChinaVirusInterest, fill = as.factor(State))) +
geom_density(alpha = 0.5)
ggplot(Finaldata, aes(x = Day, y = ChinaVirusInterest, color = factor(State))) +
geom_point() +
geom_smooth(method = "lm", se = FALSE) +
facet_wrap(~ State) +
theme(legend.position = "none")